#!/bin/sed -f

# Lexer for Unlambda

# Usage:  $ cat program.unl | sed -f lexer.sed > program.k.unl


# Written for GNU sed.


# This lexer converts an Unlambda program into a K-readable equivalent.
# * Comments and extra whitespace are removed.
# * Digraph "read" and "compare" combinators are now expressed as
#     ".[integer]" and "?[integer]" respectively, where "[integer]"
#     designates the integer value of the corresponding character.
# * Undefined combinators are indicated as an "UNDEF" token.
# * The combinator application token "`" is replaced by ">".
# * Introduces spaces between adjacent tokens.
# * Treats a "." or "?" at the end of the last line as preceding a newline.





# Add spaces after digraph combinators, to facilitate comment removal
s/[.?]./& /g


# Remove comments
s/\(^\|[^.?]\)#.*/\1/


# Convert whitespace to space
s/\(^\|[^.?]\)\s/\1 /g


# Add spaces after remaining commands (adds extra space after digraphs, but ok)
s/[^.?]/& /g


# Mark undefined
s/\( \|^\)[^ .?`ksivcdre@|]/\1UNDEF/g


# Convert apply token from "`" to ">" (can't be implemented directly in K)
s/`/>/g



# Convert characters in digraphs to integers

#numerals
s/\([.?]\)\x30/\1048/g
s/\([.?]\)\x31/\1049/g
s/\([.?]\)\x32/\1050/g
s/\([.?]\)\x33/\1051/g
s/\([.?]\)\x34/\1052/g
s/\([.?]\)\x35/\1053/g
s/\([.?]\)\x36/\1054/g
s/\([.?]\)\x37/\1055/g
s/\([.?]\)\x38/\1056/g
s/\([.?]\)\x39/\1057/g

s/\([.?]\)\x0/\100/g
s/\([.?]\)\x1/\11/g
s/\([.?]\)\x2/\12/g
s/\([.?]\)\x3/\13/g
s/\([.?]\)\x4/\14/g
s/\([.?]\)\x5/\15/g
s/\([.?]\)\x6/\16/g
s/\([.?]\)\x7/\17/g
s/\([.?]\)\x8/\18/g
s/\([.?]\)\x9/\19/g
#newline
s/\([.?]\)$/\110/g
s/\([.?]\)\xa/\110/g
s/\([.?]\)\xb/\111/g
s/\([.?]\)\xc/\112/g
s/\([.?]\)\xd/\113/g
s/\([.?]\)\xe/\114/g
s/\([.?]\)\xf/\115/g
s/\([.?]\)\x10/\116/g
s/\([.?]\)\x11/\117/g
s/\([.?]\)\x12/\118/g
s/\([.?]\)\x13/\119/g
s/\([.?]\)\x14/\120/g
s/\([.?]\)\x15/\121/g
s/\([.?]\)\x16/\122/g
s/\([.?]\)\x17/\123/g
s/\([.?]\)\x18/\124/g
s/\([.?]\)\x19/\125/g
s/\([.?]\)\x1a/\126/g
s/\([.?]\)\x1b/\127/g
s/\([.?]\)\x1c/\128/g
s/\([.?]\)\x1d/\129/g
s/\([.?]\)\x1e/\130/g
s/\([.?]\)\x1f/\131/g
s/\([.?]\)\x20/\132/g
s/\([.?]\)\x21/\133/g
s/\([.?]\)\x22/\134/g
s/\([.?]\)\x23/\135/g
#$
s/\([.?]\)\$/\136/g
s/\([.?]\)\x25/\137/g
s/\([.?]\)\x26/\138/g
s/\([.?]\)\x27/\139/g
s/\([.?]\)\x28/\140/g
s/\([.?]\)\x29/\141/g
#*
s/\([.?]\)\*/\142/g
s/\([.?]\)\x2b/\143/g
s/\([.?]\)\x2c/\144/g
s/\([.?]\)\x2d/\145/g
#.
s/\([.?]\)\./\146/g
s/\([.?]\)\x2f/\147/g
s/\([.?]\)\x3a/\158/g
s/\([.?]\)\x3b/\159/g
s/\([.?]\)\x3c/\160/g
s/\([.?]\)\x3d/\161/g
s/\([.?]\)\x3e/\162/g
s/\([.?]\)\x3f/\163/g
s/\([.?]\)\x40/\164/g
s/\([.?]\)\x41/\165/g
s/\([.?]\)\x42/\166/g
s/\([.?]\)\x43/\167/g
s/\([.?]\)\x44/\168/g
s/\([.?]\)\x45/\169/g
s/\([.?]\)\x46/\170/g
s/\([.?]\)\x47/\171/g
s/\([.?]\)\x48/\172/g
s/\([.?]\)\x49/\173/g
s/\([.?]\)\x4a/\174/g
s/\([.?]\)\x4b/\175/g
s/\([.?]\)\x4c/\176/g
s/\([.?]\)\x4d/\177/g
s/\([.?]\)\x4e/\178/g
s/\([.?]\)\x4f/\179/g
s/\([.?]\)\x50/\180/g
s/\([.?]\)\x51/\181/g
s/\([.?]\)\x52/\182/g
s/\([.?]\)\x53/\183/g
s/\([.?]\)\x54/\184/g
s/\([.?]\)\x55/\185/g
s/\([.?]\)\x56/\186/g
s/\([.?]\)\x57/\187/g
s/\([.?]\)\x58/\188/g
s/\([.?]\)\x59/\189/g
s/\([.?]\)\x5a/\190/g
#[
s/\([.?]\)\[/\191/g
#/
s/\([.?]\)\\/\192/g
#]
s/\([.?]\)\]/\193/g
#^
s/\([.?]\)\^/\194/g
s/\([.?]\)\x5f/\195/g
s/\([.?]\)\x60/\196/g
s/\([.?]\)\x61/\197/g
s/\([.?]\)\x62/\198/g
s/\([.?]\)\x63/\199/g
s/\([.?]\)\x64/\1100/g
s/\([.?]\)\x65/\1101/g
s/\([.?]\)\x66/\1102/g
s/\([.?]\)\x67/\1103/g
s/\([.?]\)\x68/\1104/g
s/\([.?]\)\x69/\1105/g
s/\([.?]\)\x6a/\1106/g
s/\([.?]\)\x6b/\1107/g
s/\([.?]\)\x6c/\1108/g
s/\([.?]\)\x6d/\1109/g
s/\([.?]\)\x6e/\1110/g
s/\([.?]\)\x6f/\1111/g
s/\([.?]\)\x70/\1112/g
s/\([.?]\)\x71/\1113/g
s/\([.?]\)\x72/\1114/g
s/\([.?]\)\x73/\1115/g
s/\([.?]\)\x74/\1116/g
s/\([.?]\)\x75/\1117/g
s/\([.?]\)\x76/\1118/g
s/\([.?]\)\x77/\1119/g
s/\([.?]\)\x78/\1120/g
s/\([.?]\)\x79/\1121/g
s/\([.?]\)\x7a/\1122/g
s/\([.?]\)\x7b/\1123/g
s/\([.?]\)\x7c/\1124/g
s/\([.?]\)\x7d/\1125/g
s/\([.?]\)\x7e/\1126/g
s/\([.?]\)\x7f/\1127/g
s/\([.?]\)\x80/\1128/g
s/\([.?]\)\x81/\1129/g
s/\([.?]\)\x82/\1130/g
s/\([.?]\)\x83/\1131/g
s/\([.?]\)\x84/\1132/g
s/\([.?]\)\x85/\1133/g
s/\([.?]\)\x86/\1134/g
s/\([.?]\)\x87/\1135/g
s/\([.?]\)\x88/\1136/g
s/\([.?]\)\x89/\1137/g
s/\([.?]\)\x8a/\1138/g
s/\([.?]\)\x8b/\1139/g
s/\([.?]\)\x8c/\1140/g
s/\([.?]\)\x8d/\1141/g
s/\([.?]\)\x8e/\1142/g
s/\([.?]\)\x8f/\1143/g
s/\([.?]\)\x90/\1144/g
s/\([.?]\)\x91/\1145/g
s/\([.?]\)\x92/\1146/g
s/\([.?]\)\x93/\1147/g
s/\([.?]\)\x94/\1148/g
s/\([.?]\)\x95/\1149/g
s/\([.?]\)\x96/\1150/g
s/\([.?]\)\x97/\1151/g
s/\([.?]\)\x98/\1152/g
s/\([.?]\)\x99/\1153/g
s/\([.?]\)\x9a/\1154/g
s/\([.?]\)\x9b/\1155/g
s/\([.?]\)\x9c/\1156/g
s/\([.?]\)\x9d/\1157/g
s/\([.?]\)\x9e/\1158/g
s/\([.?]\)\x9f/\1159/g
s/\([.?]\)\xa0/\1160/g
s/\([.?]\)\xa1/\1161/g
s/\([.?]\)\xa2/\1162/g
s/\([.?]\)\xa3/\1163/g
s/\([.?]\)\xa4/\1164/g
s/\([.?]\)\xa5/\1165/g
s/\([.?]\)\xa6/\1166/g
s/\([.?]\)\xa7/\1167/g
s/\([.?]\)\xa8/\1168/g
s/\([.?]\)\xa9/\1169/g
s/\([.?]\)\xaa/\1170/g
s/\([.?]\)\xab/\1171/g
s/\([.?]\)\xac/\1172/g
s/\([.?]\)\xad/\1173/g
s/\([.?]\)\xae/\1174/g
s/\([.?]\)\xaf/\1175/g
s/\([.?]\)\xb0/\1176/g
s/\([.?]\)\xb1/\1177/g
s/\([.?]\)\xb2/\1178/g
s/\([.?]\)\xb3/\1179/g
s/\([.?]\)\xb4/\1180/g
s/\([.?]\)\xb5/\1181/g
s/\([.?]\)\xb6/\1182/g
s/\([.?]\)\xb7/\1183/g
s/\([.?]\)\xb8/\1184/g
s/\([.?]\)\xb9/\1185/g
s/\([.?]\)\xba/\1186/g
s/\([.?]\)\xbb/\1187/g
s/\([.?]\)\xbc/\1188/g
s/\([.?]\)\xbd/\1189/g
s/\([.?]\)\xbe/\1190/g
s/\([.?]\)\xbf/\1191/g
s/\([.?]\)\xc0/\1192/g
s/\([.?]\)\xc1/\1193/g
s/\([.?]\)\xc2/\1194/g
s/\([.?]\)\xc3/\1195/g
s/\([.?]\)\xc4/\1196/g
s/\([.?]\)\xc5/\1197/g
s/\([.?]\)\xc6/\1198/g
s/\([.?]\)\xc7/\1199/g
s/\([.?]\)\xc8/\1200/g
s/\([.?]\)\xc9/\1201/g
s/\([.?]\)\xca/\1202/g
s/\([.?]\)\xcb/\1203/g
s/\([.?]\)\xcc/\1204/g
s/\([.?]\)\xcd/\1205/g
s/\([.?]\)\xce/\1206/g
s/\([.?]\)\xcf/\1207/g
s/\([.?]\)\xd0/\1208/g
s/\([.?]\)\xd1/\1209/g
s/\([.?]\)\xd2/\1210/g
s/\([.?]\)\xd3/\1211/g
s/\([.?]\)\xd4/\1212/g
s/\([.?]\)\xd5/\1213/g
s/\([.?]\)\xd6/\1214/g
s/\([.?]\)\xd7/\1215/g
s/\([.?]\)\xd8/\1216/g
s/\([.?]\)\xd9/\1217/g
s/\([.?]\)\xda/\1218/g
s/\([.?]\)\xdb/\1219/g
s/\([.?]\)\xdc/\1220/g
s/\([.?]\)\xdd/\1221/g
s/\([.?]\)\xde/\1222/g
s/\([.?]\)\xdf/\1223/g
s/\([.?]\)\xe0/\1224/g
s/\([.?]\)\xe1/\1225/g
s/\([.?]\)\xe2/\1226/g
s/\([.?]\)\xe3/\1227/g
s/\([.?]\)\xe4/\1228/g
s/\([.?]\)\xe5/\1229/g
s/\([.?]\)\xe6/\1230/g
s/\([.?]\)\xe7/\1231/g
s/\([.?]\)\xe8/\1232/g
s/\([.?]\)\xe9/\1233/g
s/\([.?]\)\xea/\1234/g
s/\([.?]\)\xeb/\1235/g
s/\([.?]\)\xec/\1236/g
s/\([.?]\)\xed/\1237/g
s/\([.?]\)\xee/\1238/g
s/\([.?]\)\xef/\1239/g
s/\([.?]\)\xf0/\1240/g
s/\([.?]\)\xf1/\1241/g
s/\([.?]\)\xf2/\1242/g
s/\([.?]\)\xf3/\1243/g
s/\([.?]\)\xf4/\1244/g
s/\([.?]\)\xf5/\1245/g
s/\([.?]\)\xf6/\1246/g
s/\([.?]\)\xf7/\1247/g
s/\([.?]\)\xf8/\1248/g
s/\([.?]\)\xf9/\1249/g
s/\([.?]\)\xfa/\1250/g
s/\([.?]\)\xfb/\1251/g
s/\([.?]\)\xfc/\1252/g
s/\([.?]\)\xfd/\1253/g
s/\([.?]\)\xfe/\1254/g
s/\([.?]\)\xff/\1255/g

#remove zeroes
s/\([.?]\)0/\1/g




# Whitespace management
s/^ *//
s/ \+/ /g
/^$/d
